"""Seed MinIO buckets with sample normalized text files for integration tests. Uploads synthetic normalized text corresponding to documents seeded by seed_sandbox.py. Each file is keyed by content_hash so the query API and other services can locate them in the stonks-normalized bucket. Usage: python -m tests.integration.seed_minio Environment variables: MINIO_ENDPOINT (default: minio:9000) MINIO_ACCESS_KEY (default: minioadmin) MINIO_SECRET_KEY (default: minioadmin) MINIO_SECURE (default: false) """ import os from io import BytesIO from minio import Minio BUCKET = "stonks-normalized" # --------------------------------------------------------------------------- # Sample normalized text content keyed by content_hash. # These hashes match the documents inserted by seed_sandbox.py (DOC_01–DOC_10). # We seed at least 5 files covering a mix of news, filings, and macro events. # --------------------------------------------------------------------------- NORMALIZED_TEXTS: dict[str, str] = { "hash_doc_01": ( "Apple Inc reported fourth-quarter earnings that exceeded Wall Street " "expectations, driven by stronger-than-anticipated iPhone sales across " "all major markets. Revenue for the quarter came in at $89.5 billion, " "up 6 percent year over year, while earnings per share reached $1.46.\n\n" "Services revenue continued its upward trajectory, hitting a new record " "of $22.3 billion. Management highlighted growth in Apple TV+ subscribers " "and the expanding installed base of over 2.2 billion active devices.\n\n" "Greater China revenue declined 2 percent amid a competitive smartphone " "landscape, though management expressed confidence in the region's " "long-term trajectory. Gross margin expanded to 46.2 percent, reflecting " "favorable product mix and supply chain efficiencies." ), "hash_doc_02": ( "Microsoft Corporation reported a 29 percent year-over-year increase in " "Azure cloud revenue, surpassing analyst estimates and reinforcing the " "company's position as a leading cloud infrastructure provider.\n\n" "Total Intelligent Cloud segment revenue reached $25.9 billion for the " "quarter. CEO Satya Nadella attributed the acceleration to enterprise " "adoption of AI workloads running on Azure, including OpenAI-powered " "services integrated into Microsoft 365 Copilot.\n\n" "Operating income for the segment grew 23 percent, with margins " "expanding despite increased capital expenditure on data center " "capacity. The company guided for continued double-digit Azure growth " "in the coming quarter." ), "hash_doc_03": ( "JPMorgan Chase & Co filed its annual 10-K report with the Securities " "and Exchange Commission, disclosing record full-year net income of " "$49.6 billion. The filing detailed strong performance across all major " "business lines.\n\n" "Investment banking fees rose 18 percent for the year, driven by a " "rebound in equity and debt underwriting activity. The consumer banking " "division reported net interest income of $89.3 billion, benefiting " "from the elevated rate environment.\n\n" "The filing noted credit provisions of $9.8 billion, reflecting a " "cautious outlook on consumer credit quality. Total assets stood at " "$3.9 trillion, with a Common Equity Tier 1 ratio of 15.0 percent, " "well above regulatory minimums." ), "hash_doc_05": ( "Exxon Mobil Corporation announced a 15 percent increase in Permian " "Basin production output, reaching 620,000 barrels of oil equivalent " "per day. The expansion was attributed to improved drilling efficiency " "and the integration of Pioneer Natural Resources assets.\n\n" "Total upstream production for the quarter averaged 3.7 million barrels " "of oil equivalent per day. Management reiterated its target of " "achieving 4.0 million barrels per day by year-end through organic " "growth and operational optimization.\n\n" "Downstream margins remained under pressure due to elevated refining " "costs and softer demand in European markets. The company maintained " "its quarterly dividend at $0.95 per share." ), "hash_doc_08": ( "The Federal Reserve held its benchmark interest rate steady at the " "5.25 to 5.50 percent range following its January 2025 policy meeting, " "in line with market expectations. The decision was unanimous among " "voting members of the Federal Open Market Committee.\n\n" "In the accompanying statement, the committee acknowledged continued " "progress on inflation, with the core Personal Consumption Expenditures " "index declining to 2.6 percent. Chair Jerome Powell signaled that rate " "cuts could begin as early as the second quarter if disinflation trends " "persist.\n\n" "Treasury yields fell modestly following the announcement, with the " "10-year note declining 5 basis points to 4.12 percent. Equity markets " "rallied on the dovish forward guidance, with the S&P 500 gaining " "0.8 percent in after-hours trading." ), "hash_doc_09": ( "Trade tensions between the United States and China escalated after " "the White House proposed a new round of tariffs targeting advanced " "semiconductor equipment and AI-related technology exports. The " "proposed measures would expand existing restrictions on chip " "manufacturing tools.\n\n" "Beijing responded with a statement warning of retaliatory measures " "on US agricultural and energy exports. Analysts noted that the " "escalation could disrupt supply chains for major technology companies " "with significant manufacturing operations in China.\n\n" "Shares of semiconductor equipment makers declined 3 to 5 percent in " "pre-market trading. Apple, which assembles the majority of its " "iPhones in China, saw its stock dip 1.2 percent on concerns about " "potential supply chain disruptions." ), "hash_doc_10": ( "JPMorgan Chase reported a 20 percent increase in investment banking " "fees for the fourth quarter, driven by a surge in mergers and " "acquisitions advisory revenue and a recovery in initial public " "offering activity.\n\n" "The bank advised on several high-profile transactions during the " "quarter, including three deals valued at over $10 billion each. " "Equity capital markets revenue doubled compared to the prior year " "period as IPO volumes returned to pre-pandemic levels.\n\n" "Fixed income trading revenue rose 8 percent, supported by elevated " "volatility in interest rate and credit markets. The bank's total " "markets revenue reached $7.1 billion for the quarter, exceeding " "consensus estimates by approximately 5 percent." ), } def seed_minio() -> None: """Upload sample normalized text files to the stonks-normalized bucket.""" client = Minio( os.environ.get("MINIO_ENDPOINT", "minio:9000"), access_key=os.environ.get("MINIO_ACCESS_KEY", "minioadmin"), secret_key=os.environ.get("MINIO_SECRET_KEY", "minioadmin"), secure=os.environ.get("MINIO_SECURE", "false").lower() == "true", ) # Ensure bucket exists (should already be created by minio-bucket-init Job) if not client.bucket_exists(BUCKET): client.make_bucket(BUCKET) uploaded = 0 for content_hash, text in NORMALIZED_TEXTS.items(): key = f"{content_hash}.txt" data = text.encode("utf-8") client.put_object( BUCKET, key, BytesIO(data), length=len(data), content_type="text/plain", ) uploaded += 1 print(f" uploaded {BUCKET}/{key} ({len(data)} bytes)") print(f"Seeded {uploaded} normalized text files into {BUCKET}.") if __name__ == "__main__": seed_minio()